// nysiis.cpp
// NYSIIS translation routine
//
// This C++ implementation by Michael Coles, MCDBA
// 8/2005
//
// Updated 8/8/2005:	Fix to properly handle PH, SCH and EV multi-character 
//						NYSIIS encodings.
#include <stdafx.h>
#include <nysiis.h>
// UpperCaseWord
// Uppercases a word passed in
// 
// Parameters:
//		BYTE *word		A pointer to the word passed in
//		ULONG length	The length of the word passed in
//
void nysiis::UpperCaseWord (BYTE *word, ULONG length) {
	ULONG i = 0;
	BYTE c;
	while (i < length) {
		c = *(word + i);
		if (c >= 'a' && c <= 'z')
			c = c - 'a' + 'A';
		*(word + i++) = c;
	}
}
// CheckStart
// Checks and updates the beginning of a word based on NYSIIS rules
// 
// Parameters:
//		BYTE *word		A pointer to the word to check
//		ULONG *length	The length of the word to check
//
void nysiis::CheckStart (BYTE *word, ULONG length) {
	if (*(word) == 'S' && length > 2) {
		if (*(word + 1) == 'C' && *(word + 2) == 'H') {
			*(word) = 'S';
			*(word + 1) = 'S';
			*(word + 2) = 'S';
		}
	} else if (*(word) == 'M' && length > 2)  {
		if (*(word + 1) == 'A' && *(word + 2) == 'C') {
			*(word) = 'M';
			*(word + 1) = 'C';
			*(word + 2) = 'C';
		}
	} else if (*(word) == 'P' && length > 1) {
		if (*(word + 1) == 'F' || *(word + 2) == 'H') {
			*(word) = 'F';
			*(word + 1) = 'F';
		}
	} else if (*(word) == 'K' &&  length > 1) {
		if (*(word + 1) == 'N') {
			*(word) = 'N';
			*(word + 1) = 'N';
		} else {
			*(word) = 'C';
		}
	} else if (*(word) == 'K' && length > 0) {
		*(word) = 'C';
	}
}
// CheckEnd
// Checks the end of the word and updates based on NYSIIS rules
// 
// Parameters:
//		BYTE *word		A pointer to the word
//		ULONG *length	The length of the word
//
void nysiis::CheckEnd (BYTE *word, ULONG *length) {
	ULONG remainingbytes = 0;
	if (*length > 2) {
		if (*(word + *length - 2) == 'E') {
			if (*(word + *length - 1) == 'E') {
				*(word + *length - 2) = 'Y';
				*(word + *length - 1) = ' ';
				*length--;
			}
		} else if (*(word + *length - 2) == 'I') {
			if (*(word + *length - 1) == 'E') {
				*(word + *length - 2) = 'Y';
				*(word + *length - 1) = ' ';
				*length--;
			}
		} else if (*(word + *length - 2) == 'D') {
			if (*(word + *length - 1) == 'T') {
				*(word + *length - 2) = 'D';
				*(word + *length - 1) = ' ';
				*length--;
			}
		} else if (*(word + *length - 2) == 'R') {
			if (*(word + *length - 1) == 'T' || *(word + *length - 1) == 'D') {
				*(word + *length - 2) = 'D';
				*(word + *length - 1) = ' ';
				*length--;
			}
		} else if (*(word + *length - 2) == 'N') {
			if (*(word + *length - 1) == 'T' || *(word + *length - 1) == 'D') {
				*(word + *length - 2) = 'D';
				*(word + *length - 1) = ' ';
				*length--;
			}
		}
	}
}
// IsVowel
// Returns whether or not a character is an vowel character
// 
// Parameters:
//		BYTE c			A character passed in
//
// Results:
//		bool			A character is 'A', 'E', 'I', 'O' or 'U'
//
bool nysiis::IsVowel (BYTE c) {
	return (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U');
}
// IsAlpha
// Returns whether or not a character is an alphabetic character
// 
// Parameters:
//		BYTE c			A character passed in
//
// Results:
//		bool			A character is between 'A' and 'Z'
//
bool nysiis::IsAlpha (BYTE c) {
	return (c >= 'A' && c <= 'Z');
}
// Translate
// Translates a word to NYSIIS phonetic encoding
//
// Parameters:
//		BYTE *word			Pointer to the word passed in
//		BYTE *encoded		Pointer to the NYSIIS encoded word returned
//		ULONG word_length	The length of the word passed in
//
ULONG nysiis::Translate (BYTE *word, BYTE *encoded, ULONG word_length) {
	BYTE currentchar = ' ';
	BYTE nextchar = ' ';
	BYTE nextnextchar = ' ';
	BYTE prevchar = ' ';
	ULONG i = 1;
	ULONG enc_length = 1;
	BYTE enc_char = ' ';

	if (word_length > 0) {
		UpperCaseWord (word, word_length);
		CheckStart (word, word_length);
		CheckEnd (word, &word_length);
		*(encoded) = *(word);
		while (i < word_length) {
			prevchar = *(word + i - 1);
			currentchar = *(word + i);
			if (i == word_length)
				nextchar = ' ';
			else
				nextchar = *(word + i + 1);
			if (i >= word_length - 1)
				nextnextchar = ' ';
			else
				nextnextchar = *(word + i + 2);
			if (currentchar == 'E' && nextchar == 'V') {
				enc_char = 'F';
				i += 1;
			} else if (IsVowel(currentchar)) {
				enc_char = 'A';
			} else if (currentchar == 'Q') {
				enc_char = 'G';
			} else if (currentchar == 'Z') {
				enc_char = 'S';
			} else if (currentchar == 'M') {
				enc_char = 'N';
			} else if (currentchar == 'K' && nextchar == 'N') {
				enc_char = 'N';
				i += 1;
			} else if (currentchar == 'K') {
				enc_char = 'C';
			} else if (currentchar == 'S' && nextchar == 'C' && nextnextchar == 'H') {
				enc_char = 'S';
				i += 2;
			} else if (currentchar == 'P' && nextchar == 'H') {
				enc_char = 'F';
				i += 1;
			} else if (currentchar == 'H') {
				if (IsVowel(prevchar) && IsVowel(nextchar)) {
					enc_char = currentchar;
				} else {
					enc_char = 'A';
				}
			} else if (currentchar == 'W' && (IsVowel(prevchar))) {
				enc_char = 'A';
			} else {
				enc_char = currentchar;
			}
			if (enc_char != *(encoded + enc_length - 1) && IsAlpha(enc_char))
			{
				*(encoded + enc_length) = enc_char;
				enc_length++;
			}
			i++;
		}
		if (enc_length > 1 && *(encoded + enc_length - 1) == 'S')
			enc_length--;
		if (enc_length > 2 && *(encoded + enc_length - 2) == 'A' && *(encoded + enc_length - 1) == 'Y')
		{
			*(encoded + enc_length - 2) = 'Y';
			*(encoded + enc_length - 1) = ' ';
			enc_length--;
		}
		if (enc_length > 1 && *(encoded + enc_length - 1) == 'A')
			enc_length--;
	}
	if (enc_length < 10)
		memset (encoded + enc_length, 32, 10 - enc_length);
	return enc_length;
}